Leak mspdbsrv.exe processes on Windows
authorAlex Crichton <alex@alexcrichton.com>
Tue, 4 Oct 2016 22:59:37 +0000 (15:59 -0700)
committerAlex Crichton <alex@alexcrichton.com>
Tue, 4 Oct 2016 23:15:28 +0000 (16:15 -0700)
Instead of having our job object tear them down, instead leak them intentionally
if everything succeeded.

Closes #3161

Cargo.lock
Cargo.toml
src/bin/cargo.rs
src/cargo/util/job.rs

index 7dc67435fff40f915c16a87de783b3accbf9bdab..13449eb396a6239d5af4d96188ad2eec8b769395 100644 (file)
@@ -24,6 +24,7 @@ dependencies = [
  "miow 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
+ "psapi-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 0.1.73 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "semver 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -270,7 +271,7 @@ dependencies = [
  "cmake 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "gcc 0.3.32 (registry+https://github.com/rust-lang/crates.io-index)",
  "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
- "libssh2-sys 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libssh2-sys 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
  "libz-sys 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "openssl-sys 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
  "pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -286,7 +287,7 @@ dependencies = [
 
 [[package]]
 name = "libssh2-sys"
-version = "0.1.38"
+version = "0.1.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "cmake 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -483,6 +484,15 @@ dependencies = [
  "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "psapi-sys"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "rand"
 version = "0.3.14"
@@ -660,7 +670,7 @@ dependencies = [
 "checksum libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)" = "23e3757828fa702a20072c37ff47938e9dd331b92fac6e223d26d4b7a55f7ee2"
 "checksum libgit2-sys 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3293dc95169a6351c5a03eca4bf5549f3a9a06336a000315876ff1165a5fba10"
 "checksum libressl-pnacl-sys 2.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "cbc058951ab6a3ef35ca16462d7642c4867e6403520811f28537a4e2f2db3e71"
-"checksum libssh2-sys 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)" = "49c845f8fad4f5761d1018dd0dba8ca49934cc7c97a8473ff20a2f181cda830c"
+"checksum libssh2-sys 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "1debd7e56d19655eb786f827675dc55f6d530de6d7b81e76d13d1afc635d6c07"
 "checksum libz-sys 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "40f2df7730b5d29426c3e44ce4d088d8c5def6471c2c93ba98585b89fb201ce6"
 "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
 "checksum matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15305656809ce5a4805b1ff2946892810992197ce1270ff79baded852187942e"
@@ -682,6 +692,7 @@ dependencies = [
 "checksum openssl-sys-extras 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)" = "11c5e1dba7d3d03d80f045bf0d60111dc69213b67651e7c889527a3badabb9fa"
 "checksum pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8cee804ecc7eaf201a4a207241472cc870e825206f6c031e3ee2a72fa425f2fa"
 "checksum pnacl-build-helper 1.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "61c9231d31aea845007443d62fcbb58bb6949ab9c18081ee1e09920e0cf1118b"
+"checksum psapi-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "abcd5d1a07d360e29727f757a9decb3ce8bc6e0efa8969cfaad669a8317a2478"
 "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
 "checksum regex 0.1.73 (registry+https://github.com/rust-lang/crates.io-index)" = "56b7ee9f764ecf412c6e2fff779bca4b22980517ae335a21aeaf4e32625a5df2"
 "checksum regex-syntax 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "31040aad7470ad9d8c46302dcffba337bb4289ca5da2e3cd6e37b64109a85199"
index 6ced3b09d3a5d1be2087680244b936d2da70d5c0..083a8654efb8436b6d89b844523b659486343cfc 100644 (file)
@@ -27,14 +27,15 @@ filetime = "0.1"
 flate2 = "0.2"
 fs2 = "0.2"
 git2 = "0.4"
-libgit2-sys = "0.4"
 git2-curl = "0.5"
 glob = "0.2"
 kernel32-sys = "0.2"
 libc = "0.2"
+libgit2-sys = "0.4"
 log = "0.3"
 miow = "0.1"
 num_cpus = "1.0"
+psapi-sys = "0.1"
 regex = "0.1"
 rustc-serialize = "0.3"
 semver = "0.2.3"
index 2e4ceb0eb96b5baa6d9cf1a52739e8b15f1de2bc..fdcea48c89fd0f951b5e1f93a5a182f36897e9e0 100644 (file)
@@ -123,7 +123,7 @@ fn execute(flags: Flags, config: &Config) -> CliResult<Option<()>> {
                           flags.flag_locked));
 
     init_git_transports(config);
-    cargo::util::job::setup();
+    let _token = cargo::util::job::setup();
 
     if flags.flag_version {
         println!("{}", cargo::version());
index dee6d0bd7cb9f41b574bbcb601b5892bbf12cb53..06f51356d25076dd5e49ca080aa4cfd23210dacd 100644 (file)
@@ -15,7 +15,9 @@
 //! child will be associated with the job object as well. This means if we add
 //! ourselves to the job object we create then everything will get torn down!
 
-pub fn setup() {
+pub use self::imp::Setup;
+
+pub fn setup() -> Option<Setup> {
     unsafe { imp::setup() }
 }
 
@@ -24,7 +26,9 @@ mod imp {
     use std::env;
     use libc;
 
-    pub unsafe fn setup() {
+    pub type Setup = ();
+
+    pub unsafe fn setup() -> Option<()> {
         // There's a test case for the behavior of
         // when-cargo-is-killed-subprocesses-are-also-killed, but that requires
         // one cargo spawned to become its own session leader, so we do that
@@ -32,6 +36,7 @@ mod imp {
         if env::var("__CARGO_TEST_SETSID_PLEASE_DONT_USE_ELSEWHERE").is_ok() {
             libc::setsid();
         }
+        Some(())
     }
 }
 
@@ -39,10 +44,26 @@ mod imp {
 mod imp {
     extern crate kernel32;
     extern crate winapi;
+    extern crate psapi;
 
+    use std::ffi::OsString;
+    use std::io;
     use std::mem;
+    use std::os::windows::prelude::*;
+
+    pub struct Setup {
+        job: Handle,
+    }
 
-    pub unsafe fn setup() {
+    pub struct Handle {
+        inner: winapi::HANDLE,
+    }
+
+    fn last_err() -> io::Error {
+        io::Error::last_os_error()
+    }
+
+    pub unsafe fn setup() -> Option<Setup> {
         // Creates a new job object for us to use and then adds ourselves to it.
         // Note that all errors are basically ignored in this function,
         // intentionally. Job objects are "relatively new" in Windows,
@@ -54,8 +75,9 @@ mod imp {
 
         let job = kernel32::CreateJobObjectW(0 as *mut _, 0 as *const _);
         if job.is_null() {
-            return
+            return None
         }
+        let job = Handle { inner: job };
 
         // Indicate that when all handles to the job object are gone that all
         // process in the object should be killed. Note that this includes our
@@ -65,27 +87,174 @@ mod imp {
         info = mem::zeroed();
         info.BasicLimitInformation.LimitFlags =
             winapi::JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
-        let r = kernel32::SetInformationJobObject(job,
+        let r = kernel32::SetInformationJobObject(job.inner,
                         winapi::JobObjectExtendedLimitInformation,
                         &mut info as *mut _ as winapi::LPVOID,
                         mem::size_of_val(&info) as winapi::DWORD);
         if r == 0 {
-            kernel32::CloseHandle(job);
-            return
+            return None
         }
 
         // Assign our process to this job object, meaning that our children will
         // now live or die based on our existence.
         let me = kernel32::GetCurrentProcess();
-        let r = kernel32::AssignProcessToJobObject(job, me);
+        let r = kernel32::AssignProcessToJobObject(job.inner, me);
         if r == 0 {
-            kernel32::CloseHandle(job);
-            return
+            return None
+        }
+
+        Some(Setup { job: job })
+    }
+
+    impl Drop for Setup {
+        fn drop(&mut self) {
+            // This is a litte subtle. By default if we are terminated then all
+            // processes in our job object are terminated as well, but we
+            // intentionally want to whitelist some processes to outlive our job
+            // object (see below).
+            //
+            // To allow for this, we manually kill processes instead of letting
+            // the job object kill them for us. We do this in a loop to handle
+            // processes spawning other processes.
+            //
+            // Finally once this is all done we know that the only remaining
+            // ones are ourselves and the whitelisted processes. The destructor
+            // here then configures our job object to *not* kill everything on
+            // close, then closes the job object.
+            unsafe {
+                while self.kill_remaining() {
+                    info!("killed some, going for more");
+                }
+
+                let mut info: winapi::JOBOBJECT_EXTENDED_LIMIT_INFORMATION;
+                info = mem::zeroed();
+                let r = kernel32::SetInformationJobObject(
+                            self.job.inner,
+                            winapi::JobObjectExtendedLimitInformation,
+                            &mut info as *mut _ as winapi::LPVOID,
+                            mem::size_of_val(&info) as winapi::DWORD);
+                if r == 0 {
+                    info!("failed to configure job object to defaults: {}",
+                          last_err());
+                }
+            }
         }
+    }
+
+    impl Setup {
+        unsafe fn kill_remaining(&mut self) -> bool {
+            #[repr(C)]
+            struct Jobs {
+                header: winapi::JOBOBJECT_BASIC_PROCESS_ID_LIST,
+                list: [winapi::ULONG_PTR; 1024],
+            }
+
+            let mut jobs: Jobs = mem::zeroed();
+            let r = kernel32::QueryInformationJobObject(
+                            self.job.inner,
+                            winapi::JobObjectBasicProcessIdList,
+                            &mut jobs as *mut _ as winapi::LPVOID,
+                            mem::size_of_val(&jobs) as winapi::DWORD,
+                            0 as *mut _);
+            if r == 0 {
+                info!("failed to query job object: {}", last_err());
+                return false
+            }
+
+            let mut killed = false;
+            let list = &jobs.list[..jobs.header.NumberOfProcessIdsInList as usize];
+            assert!(list.len() > 0);
+            info!("found {} remaining processes", list.len() - 1);
+
+            let list = list.iter().filter(|&&id| {
+                // let's not kill ourselves
+                id as winapi::DWORD != kernel32::GetCurrentProcessId()
+            }).filter_map(|&id| {
+                // Open the process with the necessary rights, and if this
+                // fails then we probably raced with the process exiting so we
+                // ignore the problem.
+                let flags = winapi::PROCESS_QUERY_INFORMATION |
+                            winapi::PROCESS_TERMINATE |
+                            winapi::SYNCHRONIZE;
+                let p = kernel32::OpenProcess(flags,
+                                              winapi::FALSE,
+                                              id as winapi::DWORD);
+                if p.is_null() {
+                    None
+                } else {
+                    Some(Handle { inner: p })
+                }
+            }).filter(|p| {
+                // Test if this process was actually in the job object or not.
+                // If it's not then we likely raced with something else
+                // recycling this PID, so we just skip this step.
+                let mut res = 0;
+                let r = kernel32::IsProcessInJob(p.inner, self.job.inner, &mut res);
+                if r == 0 {
+                    info!("failed to test is process in job: {}", last_err());
+                    return false
+                }
+                res == winapi::TRUE
+            });
+
+
+            for p in list {
+                // Load the file which this process was spawned from. We then
+                // later use this for identification purposes.
+                let mut buf = [0; 1024];
+                let r = psapi::GetProcessImageFileNameW(p.inner,
+                                                        buf.as_mut_ptr(),
+                                                        buf.len() as winapi::DWORD);
+                if r == 0 {
+                    info!("failed to get image name: {}", last_err());
+                    continue
+                }
+                let s = OsString::from_wide(&buf[..r as usize]);
+                info!("found remaining: {:?}", s);
 
-        // Intentionally leak the `job` handle here. We've got the only
-        // reference to this job, so once it's gone we and all our children will
-        // be killed. This typically won't happen unless Cargo itself is
-        // ctrl-c'd.
+                // And here's where we find the whole purpose for this
+                // function!  Currently, our only whitelisted process is
+                // `mspdbsrv.exe`, and more details about that can be found
+                // here:
+                //
+                //      https://github.com/rust-lang/rust/issues/33145
+                //
+                // The gist of it is that all builds on one machine use the
+                // same `mspdbsrv.exe` instance. If we were to kill this
+                // instance then we could erroneously cause other builds to
+                // fail.
+                if let Some(s) = s.to_str() {
+                    if s.contains("mspdbsrv") {
+                        info!("\toops, this is mspdbsrv");
+                        continue
+                    }
+                }
+
+                // Ok, this isn't mspdbsrv, let's kill the process. After we
+                // kill it we wait on it to ensure that the next time around in
+                // this function we're not going to see it again.
+                let r = kernel32::TerminateProcess(p.inner, 1);
+                if r == 0 {
+                    info!("\tfailed to kill subprocess: {}", last_err());
+                    info!("\tassuming subprocess is dead...");
+                } else {
+                    info!("\tterminated subprocess");
+                }
+                let r = kernel32::WaitForSingleObject(p.inner, winapi::INFINITE);
+                if r != 0 {
+                    info!("failed to wait for process to die: {}", last_err());
+                    return false
+                }
+                killed = true;
+            }
+
+            return killed
+        }
+    }
+
+    impl Drop for Handle {
+        fn drop(&mut self) {
+            unsafe { kernel32::CloseHandle(self.inner); }
+        }
     }
 }